In [1]:
import json
import numpy as np
from ..src.data
In [29]:
def import_labels(f):
''' Read from a file all the labels from it '''
lines = f.readlines()
labels = []
i = 0
for l in lines:
t = l.split('\t')
assert int(t[0]) == i
label = t[1].split('\n')[0]
labels.append(label)
i += 1
return labels
def to_categorical(y, nb_classes=None):
''' Convert class vector (integers from 0 to nb_classes)
to binary class matrix, for use with categorical_crossentropy.
'''
if not nb_classes:
nb_classes = np.max(y)+1
Y = np.zeros((len(y), nb_classes))
for i in range(len(y)):
Y[i, y[i]] = 1.
return Y
def generate_output(video_info, labels, length=16):
''' Given the info of the vide, generate a vector of classes corresponding the output for each
clip of the video which features have been extracted.
'''
nb_frames = video_info['num_frames']
last_first_name = nb_frames - length + 1
start_frames = range(0, last_first_name, length)
# Check the output for each frame of the video
outputs = ['none'] * nb_frames
for i in range(nb_frames):
# Pass frame to temporal scale
t = i / float(nb_frames) * video_info['duration']
for annotation in video_info['annotations']:
if t >= annotation['segment'][0] and t <= annotation['segment'][1]:
outputs[i] = annotation['label']
label = annotation['label']
break
instances = []
for start_frame in start_frames:
# Obtain the label for this isntance and then its output
output = None
outs = outputs[start_frame:start_frame+length]
if outs.count(label) >= length / 2:
output = labels.index(label)
else:
output = 0
instances.append(output)
return instances
In [5]:
with open("../dataset/labels.txt", "r") as f:
labels = import_labels(f)
In [10]:
with open("../dataset/videos.json", "r") as f:
videos_info = json.load(f)
video_info = videos_info['Uw_0h2UrfyY']
print(video_info)
In [31]:
instances = generate_output(video_info, labels, length=16)
print(instances)
Y = to_categorical(instances, nb_classes=200)
print(Y[100:300])
Old test with old code implemented.
In [2]:
video = None
for v in dataset.get_subset_videos('validation'):
if v.video_id == 'Uw_0h2UrfyY':
video = v
print(video.serialize())
In [3]:
video.get_video_instances(16, 0)
ground_trouth = np.array([instance.output for instance in video.instances])
print(ground_trouth)
In [4]:
detection_prediction_path = detection_predictions_path + video.video_id + '.npy'
classification_prediction_path = classification_predictions_path + video.video_id + '.npy'
class_prediction = np.load(classification_prediction_path)
detection_prediction = np.load(detection_prediction_path)
print(class_prediction)
print(detection_prediction)
In [5]:
mix = np.zeros(class_prediction.shape, dtype=np.int64)
for pos in range(class_prediction.size):
if detection_prediction[pos] == 1:
mix[pos] = class_prediction[pos]
print(mix)
prediction = get_temporal_predictions_2(mix, fps=video.fps)
pprint.pprint(prediction)
In [ ]: